import jieba
from collections import Counter
import re
import requests
import json




def word_segment(text):
    '''
    通过jieba进行分词并通过空格分隔,返回分词后的结果
    '''

    # 计算每个词出现的频率，并存入txt文件
    text = re.sub('[^\u4e00-\u9fa5]+','',text)
    jieba_word=jieba.cut(text,cut_all=False) # cut_all是分词模式，True是全模式，False是精准模式，默认False
    data=[]
    for word in jieba_word:
        data.append(word)
    dataDict=Counter(data)
    with open('doc//词频统计.txt','w',encoding='UTF-8') as fw:
        for k,v in dataDict.items():
            fw.write("%s,%d\n" % (k,v))
        #  fw.write("%s"%dataDict)


    # 返回分词后的结果
    jieba_word=jieba.cut(text,cut_all=False) # cut_all是分词模式，True是全模式，False是精准模式，默认False
    seg_list=' '.join(jieba_word)
    return seg_list.split(" ")

def seg(text):
    # 请求地址
    url = 'https://eae266ec46b040f9afb1ae22bef2676e.apig.cn-north-4.huaweicloudapis.com/v1/infers/240dd325-dfaf-4950-81a1-992f3aae0164/api/SegTag'
    # sour为待分词语料
    sour = text
    sour = re.sub('[^\u4e00-\u9fa5]+','',sour)
    # sour = re.sub(r'[^a-zA-Z0-9\u4E00-\u9Fa5]+', '', sour)
    data = {"token": "test", "isTag": "0", "sour": sour}
    data = json.dumps(data)
    data = data.encode('UTF-8')
    headers = {'Content-Type': 'application/json',
               'X-Apig-AppCode': '2fbd1dee3ec64bf3a35c860027f00d84faa45118659841f3a28153759f78e2cc'}
    r = requests.post(url, data=data, headers=headers)
    return r.json()['segedSour'].replace("  ", " ").split(" ")

def judge(s):
    jud_list = ["/n","/a","/v"]
    for i in jud_list:
        if i in s:
            return 1
    return 0


def seg_tag(text):
    url = 'https://eae266ec46b040f9afb1ae22bef2676e.apig.cn-north-4.huaweicloudapis.com/v1/infers/240dd325-dfaf-4950-81a1-992f3aae0164/api/SegTag'
    # sour为待分词语料
    sour = text
    sour = re.sub('[^\u4e00-\u9fa5]+', '', sour)
    # sour = re.sub(r'[^a-zA-Z0-9\u4E00-\u9Fa5]+', '', sour)
    data = {"token": "test", "isTag": "1", "sour": sour}
    data = json.dumps(data)
    data = data.encode('UTF-8')
    headers = {'Content-Type': 'application/json',
               'X-Apig-AppCode': '2fbd1dee3ec64bf3a35c860027f00d84faa45118659841f3a28153759f78e2cc'}
    r = requests.post(url, data=data, headers=headers)
    sour = r.json()['segedSour'].replace("  ", " ").split(" ")
    seg_list = []
    for i in sour:
        if judge(i):
            seg_list.append(re.sub("/.*", "", i))

    return seg_list



name = '1'
with open('doc/'+name+".txt", 'r', encoding='UTF-8') as f:
    text = f.read()
    f.close()
segs = word_segment(text)
print(segs)
